---
title: "Kenya National Census 2019"
author: "Antony Rono"
output:
flexdashboard::flex_dashboard:
#theme: lumen
orientation: rows
vertical_layout: fill
#runtime: shiny
source_code: embed
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
#knitr::opts_chunk$set(fig.width=12, fig.height=10)
options(scipen = 99)
library(tidyverse)
library(tidytuesdayR)
library(scales)
library(tidyverse)
library(janitor)
library(ggrepel)
library(ggthemes)
library(plotly)
library(ggpubr)
library(RColorBrewer)
library(rKenyaCensus)
library(patchwork)
library(sp)
library(sf)
library(flexdashboard)
library(here)
knitr::opts_knit$set(root.dir = here("dashboards"))
#setwd( here("dashboards"))
theme_set(theme_light())
```
```{r Load, include=FALSE}
#tt <- tt_load("2021-01-19")
# tt %>%
#
# map(glimpse)
gender <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-19/gender.csv')
crops <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-19/crops.csv')
households <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-19/households.csv')
tt <- list(gender=gender, crops=crops, households=households)
gender <- tt$gender %>%
clean_names() %>%
mutate(county = str_to_title(county))
household <- tt$households %>%
clean_names() %>%
mutate(county = str_replace(county, "([a-z])([A-Z])", "\\1 \\2"),
county = str_to_title(county)) %>%
mutate(county = (str_trim(county))
)
crops <- tt$crops %>%
clean_names() %>%
rename(county = sub_county) %>%
mutate(county = str_to_title(county)
)
```
Overall {data-icon="fa-globe"}
=======================================================================
Row {data-width=80}
-----------------------------------------------------------------------
### Total Population
```{r Overall population}
population <- household[1,2]$population %>%
comma()
valueBox(value = population, caption = "Population", icon = "fa-user-plus", color = "green")
```
### Average Household Size
```{r HH Size}
hh_size <- household[1,ncol(household)]
valueBox(value = hh_size, caption = "Average Household Size", icon = "fa-house-user", color = "orange")
```
### Number of Counties
```{r No. of counties}
valueBox(value = nrow(household)-1, caption = "Number of counties", icon = "fa-map-marker-alt", color = "coral")
```
Row {.tabset .tabset-fade}
-----------------------------------------------------------------------
### Population per county - Top 25
```{r Population per county - Top 25}
plot_by_county <- function(data){
data %>%
filter(county != "Kenya") %>%
mutate(county = fct_reorder(county, population, sum, .desc = TRUE)) %>%
ggplot(aes(population, county, fill = county)) +
geom_col() +
scale_x_continuous(expand=c(0,0), label = comma) +
theme(legend.position = "none") +
coord_flip() +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
}
p <- household %>%
slice_max(population, n = 26) %>%
plot_by_county
ggplotly(p, tooltip = c("x", "y"))
```
### Population per county - Bottom 22
```{r Population per county - Bottom 22}
p <- household %>%
slice_min(population, n = 22) %>%
plot_by_county
ggplotly(p, tooltip = c("x", "y"))
```
Row
-----------------------------------------------------------------------
### HH Size by County
```{r Visualize Households, dpi = 200}
p <- household %>%
filter(county != "Kenya") %>%
ggplot(aes(population, average_household_size, text = county)) +
geom_jitter()+
geom_text(aes(label = county), size = 2.5) +
scale_x_continuous(labels = comma)+
scale_y_continuous(labels = comma)
ggplotly(p, tooltip = c("x", "y", "text")) %>%
style(textposition = "right")
# ggtitle("HH Size by County")
```
Gender Distribution {data-orientation=columns data-icon="fa-venus-mars"}
=======================================================================
Column {data-width=500}
-----------------------------------------------------------------------
### Population Distribution by County and Sex
```{r Visualize Gender by sex and county}
p <- gender %>%
filter(county != "Total") %>%
pivot_longer( cols = c(male, female, intersex),
names_to = "gender", values_to = "Population") %>%
mutate(county = str_to_title(county)) %>%
mutate(county = fct_reorder(county, total, sum),
gender = fct_reorder(gender, Population, sum, desc = TRUE)
) %>%
ggplot(aes(Population, county, fill = gender)) +
scale_x_continuous(labels = comma, expand = c(0,0))+
geom_col()
# labs(title = "Population Distribution by Counties and Sex",
# subtitle = "The proportion of sex seems to be equal in all counties")
ggplotly(p)
```
Column {data-width=500}
-----------------------------------------------------------------------
### Proportion of gender - Overall
```{r Overall populatio}
gender %>%
filter(county == "Total") %>%
pivot_longer( cols = c(male, female, intersex),
names_to = "gender", values_to = "Population") %>%
mutate(pct = Population/total) %>%
select(gender, Population) %>%
plot_ly(labels = ~gender, values = ~ Population) %>%
add_pie(hole = 0.6)
# p <- gender %>%
#
# filter(county == "Total") %>%
#
# pivot_longer( cols = c(male, female, intersex),
#
# names_to = "gender", values_to = "Population") %>%
#
# mutate(pct = round(Population/total,3),
#
# ymax = cumsum(pct),
#
# ymin = lag(ymax, default = 0),
#
# labelPosition = rowSums(across(c(ymax, ymin)))/2,
#
# label = paste0(gender,":\n", percent(pct))) %>%
#
# #label =percent(pct) ) %>%
#
# ggplot(aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=gender)) +
#
# geom_rect() +
#
# geom_text( x=2, aes(y=labelPosition, label=label, color=gender), size=4) + # x here controls label position (inner / outer)
#
# scale_fill_brewer(palette="BuPu") +
#
# # scale_color_brewer(palette=3) +
#
# coord_polar(theta="y") +
#
# xlim(c(-1, 4)) +
#
# theme_void()
#
```
### Proportion of Males in Each County
```{r Proportion of males by County}
p <- gender %>%
filter(county != "Total") %>%
mutate(pct_male = male/total,
pct_female = female/total) %>%
ggplot(aes(total, pct_male, text = county)) +
geom_jitter()+
geom_text(aes(label = county), size = 2.5) +
geom_hline(yintercept = 0.5, color = "red")+
scale_x_continuous(labels = comma)+
scale_y_continuous(labels = percent)
ggplotly(p, tooltip = c("x", "y", "text")) %>%
style(textposition = "right")
```
Farming {data-orientation=columns data-icon="fa-seedling"}
=======================================================================
Column {.tabset}
-----------------------------------------------------------------------
```{r Crops Clean}
## Crops as rows
crops_clean <- crops %>%
mutate_at(vars(-"county"), as.numeric) %>%
pivot_longer(cols = -c(farming, county),
names_to = "crop",
values_to = "households") %>%
filter(!is.na(households), county != "Kenya") %>%
mutate(county = str_to_title(county),
county = fct_reorder(county, households, sum),
crop = fct_reorder(crop, households, sum, desc = FALSE))
```
### Distribution of crops being grown
```{r overall crops breakdown}
p <- crops_clean %>%
group_by(crop) %>%
summarise(households = sum(households)) %>%
ggplot(aes(households, crop, fill = crop))+
geom_col()+
theme(legend.position = "none")+
scale_x_continuous(expand = c(0,0))
ggplotly(p)
```
### 30 Heaviest Farming Counties
```{r Top 30 farming Counties}
p <- crops_clean %>%
left_join(household, by = "county") %>%
mutate(percent_doing_farming = farming/number_of_households ) %>%
distinct(county, number_of_households, percent_doing_farming) %>%
slice_max(order_by = percent_doing_farming, n = 30) %>%
ggplot(aes(number_of_households, percent_doing_farming)) +
geom_point()+
#geom_text(aes(label = county), vjust = -1, hjust = 1)+
geom_text(aes(label = county))+
scale_x_continuous(labels = comma)+
scale_y_continuous(labels = percent)
ggplotly(p, tooltip = c("x", "y", "text")) %>%
style(textposition = "top-right")
```
### 17 Lowest Farming Counties
```{r 17 lowest farming Counties}
p <- crops_clean %>%
left_join(household, by = "county") %>%
mutate(percent_doing_farming = farming/number_of_households ) %>%
distinct(county, number_of_households, percent_doing_farming) %>%
slice_min(order_by = percent_doing_farming, n = 17) %>%
ggplot(aes(number_of_households, percent_doing_farming)) +
geom_point()+
#geom_text(aes(label = county), vjust = -1, hjust = 1)+
geom_text(aes(label = county))+
scale_x_continuous(labels = comma)+
scale_y_continuous(labels = percent)
ggplotly(p, tooltip = c("x", "y", "text")) %>%
style(textposition = "top-right")
```
Column {.tabset}
-----------------------------------------------------------------------
### Magnitude of Farming Per Crop in Each County
```{r Visualize Crop Heatmap}
## Heatmap of crops by county and HHs
p <- crops_clean%>%
complete(crop, county, fill = list(households = 0)) %>%
ggplot(aes(crop, county, fill = households)) +
geom_tile() +
scale_fill_gradient(low = "#e6550d",
high = "#fee6ce",
guide = "colorbar") +
scale_y_discrete(expand=c(0,0))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))+
labs(x = "Crop",
y = "",
fill = "No. of HHs growing this crop")
ggplotly(p)
```
### Top 5 Counties Growing Each Crop
```{r Visualize Top 5 Counties per Crop}
p <- crops_clean %>%
left_join(household, by = "county") %>%
mutate(percent_doing_farming = households/number_of_households ) %>%
group_by(crop) %>%
slice_max(order_by = percent_doing_farming,n= 5) %>%
ggplot(aes(number_of_households, percent_doing_farming, text = county)) +
geom_point()+
geom_text(aes(label = county), size = 2.5) +
scale_x_continuous(labels = unit_format(unit = "K", scale = 1e-3))+
scale_y_continuous(labels = percent) +
facet_wrap(~crop, scales = "free_y", strip.position = "right") +
theme(axis.title.y =element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
theme(axis.text.x = element_text(angle = 0, hjust = 1))
ggplotly(p, tooltip = c("x", "y", "text")) %>%
style(textposition = "top-right")
```